//produce db relating to job orders approved by SLBFE  

clear
set memory 500m
set matsize 100
set more off 
# delimit;

* Set directory; 

cd "~/Desktop/migrec_replication/do/";


** append job-order datasets;

	foreach i of numlist 6/9 {;
		insheet using ../raw_secure/job-order-0`i'.csv, comma clear;
		save ../dta_secure/job-order-`i'.dta, replace;
	};

	foreach i of numlist 10/16 {;
		insheet using ../raw_secure/job-order-`i'.csv, comma clear;
		save ../dta_secure/job-order-`i'.dta, replace;
	};

	use ../dta_secure/job-order-6.dta,  clear;

	foreach i of numlist 7/16 {;
		append using ../dta_secure/job-order-`i'.dta, force;
	};

** STEP 1: figure out dates;

	gen jo_yr = substr(jo_index, 1, 4);
	gen apr_yr = substr(aprvno, -2, 2);
	destring jo_yr, replace;
	destring apr_yr, replace force;
	replace apr_yr = 2000 + apr_yr;

** STEP 2: figure out sectors;

	rename jobcode job_code;
	merge m:1 job_code using ../dta_secure/job_codes.dta, gen(jobcodemerge);
	keep if jobcodemerge!= 2;


** STEP 3: Convert currencies ;

* note, does not account for inflation;

	gen cuunit = currency1;
	merge m:1 cuunit using ../dta_secure/currencies.dta, gen(currencymerge);
	keep if currencymerge != 2;
	gen usd_minsalary = ((minsalary*cn_currency_rate)/132.9);
	gen usd_maxsalary = ((maxsalary*cn_currency_rate)/132.9);
	
	gen departure_year = jo_yr; 
	tostring departure_year, replace; 
	gen cuunit_anf = cuunit; 
	
	
	merge m:1 cuunit_anf departure_year using ../dta_secure/currency_deflated_cuunit.dta, gen(dcur_merge); 
	keep if dcur_merge != 2; 
	
	drop departure_year cuunit_anf; 
	
	gen usd_minsalary_d = minsalary/real_value;
	gen usd_maxsalary_d = maxsalary/real_value;


** ADD ANF JOB SECTOR CLASSIFICATIONS;

	merge m:1 jobcode_jobsum using ../res_secure/jobcode_classification_anf.dta, gen(jobcode_class_anf);

** CLEAN DATA;

* generate license no for match with agency data; 
	
	g license_no = substr(aprvno,4,4);	

* make approval numbers all upper case;

	replace aprvno = upper(aprvno);
				
* encode string vars;

	foreach x in sex food accom ticket medical {; 
		encode `x', gen(`x'_tmp);
		drop `x' ;
		rename `x'_tmp apr_`x';
	};
		
	foreach x in jobcode_sector jobcode_area jobcode_type jobcode_gender 
	jobcode_cess_cat jobcode_jobsum jobcode_skill_anf {;
		encode `x', gen(`x'_tmp);
		drop `x' ;
		rename `x'_tmp `x';
	};
		
	replace apr_food = 1 if apr_food == 10;
	lab define foodcode 1 "Allowance" 3 "Free" 5 "Not provided";
	lab define accomcode 1 "Allowance" 3 "Free" 6 "Not provided";
	lab value apr_food foodcode;
	lab value apr_accom accomcode;
		
	g apr_ticket_return = (apr_ticket == 8 | apr_ticket == 11);
	g apr_food_free = (apr_food == 3);
	g apr_accom_free = (apr_accom == 3);
	
	replace jobcode_type = 5 if jobcode_type == 6;
	replace jobcode_gender = 3 if jobcode_gender == 4;
		
	lab define jobsec 1 "Construction" 2 "Domestic" 3 "Garments" 4 "Hospitality" 5 "N/A";
	lab values jobcode_sector jobcode_type jobsec;
		
	lab define sexcode 1 "Female" 2 "Male" 3 "Either";
	lab values jobcode_gender apr_sex sexcode;
		
	lab define skillcode 1 "Professional" 2 "Skilled" 3 "Semi-skilled" 4 "Middle level" 
		5 "Clerical and other" 6 "Unskilled" 7 "Housemaid";
	lab values jobcode_man_lev skillcode;
	rename jobcode_man_lev jobcode_skill;
		
	rename jobcode_skill_anf jobcode_skill_lev;
		
	lab define relhouse 1 "HM -Muslim domestic worker" 2 "HN -Non-muslim domestic worker";
	lab values jobcode_cess_cat relhouse;
				
	rename currency1 currency;
	rename reqvacancyno  num_vac_req;
	rename vacancyno	num_vac_app;
	rename sent num_vac_sent;
	rename jobcode_jobsum jobcode_jtitle;
			
	drop cuunit;
		
* zero obs - drop;
	drop airticket jobapp sal_pd ot_ph nm_hours sent1 min_hours	mon_income ;
	drop jobcode_minor jobcode_uti jobcodemerge;
		
* label var ;
	la var apr_sex "Sex of worker sought";
	la var apr_food "T&C of job order (food)";
	la var apr_accom "T&C of job order (accomodations)";
	la var apr_ticket "T&C of job order (plane ticket)";
	la var apr_medical "T&C of job order (health insurance)";
	la var apr_food_free "T&C of job order- free food";
	la var apr_accom_free "T&C of job order- free accomodations";
	la var apr_ticket_return "T&C of job order- return plane ticket";
	la var minsalary "Min salary on offer";
	la var maxsalary "Max salary on offer";
	la var usd_minsalary_d "Min salary on offer - 2015 dollars";
	la var usd_maxsalary_d "Max salary on offer - 2015 dollars";
	la var currency "Currency of salary";
	la var num_vac_app "Number of vacancies approved";
	la var num_vac_req "Number of vacancies requested";
	la var num_vac_sent "Number of vacancies filled";
	la var maxamount "Max amount chargeable to local agency";
	la var expdate "Expiry date of job order validity";
	la var jo_yr "Job order year (from jo_index)";
	la var apr_yr "Order approval year (from aprvno)" ;

	* unknown variables * find out what they are *
	* sal_type sal_ph allowance	edt_sysdt	prino	ent_sysdt	pri nospolicer	jo_index1	medicalno
	* japrvno	pr_aprvno jobcode_major;
	
	order id license_no apr_yr aprvno jo_index job_code minsalary;
	sort aprvno license_no apr_yr jo_index;
		
	egen aprvtag = tag(aprvno); //tag approval number
	g year = apr_yr ;
	la var year "Year of approval";
	
** SAVE JOB ORDER;
save ../dta_secure/job_order.dta, replace; 

	// 	This dataset is at the firm X job order X index X year level. That is,
	//	for a given firm we observe a job order (which is in a given year) and
	//	all the different vac requests within the order.

*******************************************************************************;
** JOB ORDER LEVEL DATA - this generates a unique job order level dataset;
	
	use ../dta_secure/job_order.dta, clear; 
	
	* vac types in job order; 
	
		forval i = 1/7 {;
			g num_vac_req_skilltype`i' = num_vac_req if (jobcode_skill==`i');
			g num_vac_app_skilltype`i' = num_vac_app if (jobcode_skill==`i');
			g num_vac_sent_skilltype`i' = num_vac_sent if (jobcode_skill==`i');
		};
	
		foreach x in construction domestic retail {;
			g num_vac_req_`x' = num_vac_req if (job_code_`x' == 1);
			g num_vac_app_`x' = num_vac_app if (job_code_`x' == 1);
			g num_vac_sent_`x' = num_vac_sent if (job_code_`x' == 1);
		};

		forval x = 1/3 {;
			g num_vac_req_skill`x' = num_vac_req if (jobcode_skill_lev == `x');
			g num_vac_app_skill`x'  = num_vac_app if (jobcode_skill_lev == `x');
			g num_vac_sent_skill`x'  = num_vac_sent if (jobcode_skill_lev == `x');
		};
	
		g jo_index_n = 1; //to count the different positions
		
	* collapse at the job order level;
		
		collapse (sum) num_vac_* jo_index_n
			(first) apr_ticket_return apr_food_free apr_accom_free apr_yr year jo_yr
			apr_sex apr_food apr_accom apr_ticket apr_medical pri prino license_no 
			ent_sysdt expdate 
			*currency* (mean) maxsalary usd_minsalary usd_maxsalary usd_minsalary_d usd_maxsalary_d, by(aprvno);
	
	* clean up;
	
		g mean_vac_sent = num_vac_sent / num_vac_req;

		la var num_vac_app "Number of vacancies approved per job order"  ;
		la var num_vac_req "Number of vacancies requested per job order"  ;
		la var num_vac_sent "Number of vacancies filled per job order"  ;
		la var mean_vac_sent "Mean job order vac fill rate"  ;
		la var maxsalary "Max salary offered on contract"; 
	
		rename jo_index_n num_jo_index;
	
		la var num_jo_index "Number of recruitment categories in job order";
		la var apr_sex "Sex of worker sought";
		la var apr_food "T&C of job order (food)";
		la var apr_accom "T&C of job order (accomodations)";
		la var apr_ticket "T&C of job order (plane ticket)";
		la var apr_medical "T&C of job order (health insurance)";
		la var apr_food_free "T&C of job order- free food";
		la var apr_accom_free "T&C of job order- free accomodations";
		la var apr_ticket_return "T&C of job order- return plane ticket";	

		la var num_vac_req_skill1 "Num HIGH-SKILL vacancies requested";
		la var num_vac_req_skill2 "Num MEDIUM-SKILL vacancies requested";
		la var num_vac_req_skill3 "Num LOW-SKILL vacancies requested";
		la var num_vac_app_skill1 "Num HIGH-SKILL vacancies approved";
		la var num_vac_app_skill2 "Num MEDIUM-SKILL vacancies approved";
		la var num_vac_app_skill3 "Num LOW-SKILL vacancies approved";
		la var num_vac_sent_skill1 "Num HIGH-SKILL vacancies filled";
		la var num_vac_sent_skill2 "Num MEDIUM-SKILL vacancies filled";
		la var num_vac_sent_skill3 "Num LOW-SKILL vacancies filled";
	
		la var num_vac_req_construction "Num vacancies requested - construction";
		la var num_vac_app_construction "Num vacancies approved - construction";
		la var num_vac_sent_construction "Num vacancies filled - construction";
		la var num_vac_req_domestic  	"Num vacancies requested - domestic";
		la var num_vac_app_domestic 	"Num vacancies approved - domestic";
		la var num_vac_sent_domestic 	"Num vacancies filled - domestic";
		la var num_vac_req_retail 		"Num vacancies requested - retail";
		la var num_vac_app_retail 		"Num vacancies approved - retail";
		la var num_vac_sent_retail		"Num vacancies filled - retail";
		
		local code "professional skilled semiskilled middle clerical unskilled domestic";
		tokenize `code';
		forval i = 1/7 {;
			la var num_vac_req_skilltype`i' "Num vacancies requested of skill type ``i''";
			la var num_vac_req_skilltype`i' "Num vacancies requested of skill type ``i''";
			la var num_vac_req_skilltype`i' "Num vacancies requested of skill type ``i''";
		};
		
	* save ;
	
		save ../dta_secure/job_order_uniq.dta, replace; 	

*******************************************************************************;
* FIRM X YEAR LEVEL DATA ON JOB ORDERS, where year is the approval year 
of the job order;

	use ../dta_secure/job_order_uniq.dta, clear; 
	
	g aprvtag = 1 ;
	g apr_medical_free = (apr_medical == 4);
	g num_vac_req_ticket_return = num_vac_req if apr_ticket_return == 1;
	g num_vac_req_food_free = num_vac_req if apr_food_free == 1;
	g num_vac_req_accom_free = num_vac_req if apr_accom_free == 1;
	g num_vac_req_medical_free = num_vac_req if apr_medical_free == 1;
	
* collapse at the firm X year level, where year is the approval year;

		collapse (sum) apr_ticket_return apr_food_free apr_accom_free apr_medical_free aprvtag
			num_vac_*  (mean) mean_vac_sent maxsalary usd_minsalary usd_maxsalary usd_minsalary_d usd_maxsalary_d 
			mean_vac_req=num_vac_req, by(license_no year);
	
* clean;
		destring, replace; 
		rename aprvtag num_job_orders;
		
		
		gen ticket_share_vac = num_vac_req_ticket_return/num_vac_req; 
		gen ticket_share_jo = apr_ticket_return/num_job_orders;
		
		gen med_share_vac = num_vac_req_medical_free/num_vac_req; 
		gen med_share_jo = apr_medical_free/num_job_orders;
		
		gen accom_share_vac = num_vac_req_accom_free/num_vac_req; 
		gen accom_share_jo = apr_accom_free/num_job_orders;
		
		gen food_share_vac = num_vac_req_food_free/num_vac_req; 
		gen food_share_jo = apr_food_free/num_job_orders;

		gen domestic_share = num_vac_req_domestic/num_vac_req;
		gen fill_rate = num_vac_sent/num_vac_req; 
		
		
		la var ticket_share_vac "Share of vacancies with return ticket provided";
		la var ticket_share_jo "Share of job orders with return ticket provided";
		la var med_share_vac "Share of vacancies with health insurance provided";
		la var med_share_jo "Share of job orders with health insurance provided";
		la var accom_share_vac "Share of vacancies with accomodations provided";
		la var accom_share_jo "Share of job orders with accomodations provided";
		la var food_share_vac "Share of vacancies with food provided";
		la var food_share_jo "Share of job orders with food provided";
		
		la var fill_rate "fill rate using sums not means across job orders";
		la var num_job_orders "Number of job orders in year";
		la var mean_vac_req "Avg. number of vacancies requested in job order"; 
		la var usd_maxsalary "Avg. max salary of job order"; 
		la var usd_minsalary "Avg. min salary of job order"; 
		la var usd_maxsalary_d "Avg. max salary of job order - 2015 dollars"; 
		la var usd_minsalary_d "Avg. min salary of job order - 2015 dollars"; 
		la var mean_vac_sent "Avg. vacancy fill rate";
		la var domestic_share "Avg. share of vacancies that are domestic work";
			

	
		save "../dta_secure/firm_joborder_yr", replace;


*******************************************************************************;

* FIRM X MONTH LEVEL DATA ON JOB ORDERS, where year is the approval year 
of the job order;
	
	#delimit;
	
	use ../dta_secure/job_order_match/job_order_match.dta, clear;
	
	*estimate job order approval month from registration date range ;
	*(more accurate than using departure date when testing on cases with known jo approval month);
	
		g reg_month = substr(reg_date, 1, 1) if substr(reg_date, 2, 1) == "/";
		replace reg_month = substr(reg_date, 1, 2) if substr(reg_date, 3, 1) == "/"; 
		destring reg_month, replace;
	
		g reg_yr = substr(reg_date, -7, 2);
		destring reg_yr, replace;
		replace reg_yr = reg_yr + 1900;
		replace reg_yr = reg_yr + 100 if reg_yr < 1919;
	
		gen reg_month_yr = ym(reg_yr, reg_month);
		format reg_month_yr %tm;
	
		bys aprvno: egen max_reg_month_yr = max(reg_month_yr);
		bys aprvno: egen min_reg_month_yr = min(reg_month_yr);
		format max_reg_month_yr %tm;
		format min_reg_month_yr %tm;
		g dif_reg_month = max_reg_month_yr - min_reg_month_yr;
	
	
		/*Earliest reg_date is the estimated job order approval month only when 
			the range of registration dates is 6 or 12 months for a given aprvno
			(these had >90% rate of matching the actual job order month when testing 
			on cases with known jo approval month) ; */
		gen month_tmp = reg_month if reg_month_yr == min_reg_month_yr;
		bys aprvno: egen jo_month_reg = max(month_tmp);
		drop month_tmp;
		
		gen yr_tmp = reg_yr if reg_month_yr == min_reg_month_yr;
		bys aprvno: egen jo_yr_reg = min(yr_tmp);
		drop yr_tmp;
			
	collapse (first) jo_month_reg max_reg_month min_reg_month dif_reg_month ent_sysdt, 
		by(aprvno jo_yr_reg);
	
	*merge with unique job orders;
	merge 1:1 aprvno using ../dta_secure/job_order_uniq.dta, gen(m_reg_month);
		drop if m_reg_month == 1;//keeps only job orders in job_order_uniq.dta;

		g aprvtag = 1 ;
	
		g jo_month = substr(ent_sysdt, 1, 1) if substr(ent_sysdt, 2, 1) == "/";
		replace jo_month = substr(ent_sysdt, 1, 2) if substr(ent_sysdt, 3, 1) == "/"; 
		destring jo_month, replace;
	
		replace jo_month = jo_month_reg if jo_month == . & jo_month_reg != .;
		replace year = jo_yr_reg if year != jo_yr_reg & jo_yr_reg !=.;
		
		rename jo_month month;
		
		gen apr_medical_free = (apr_medical == 4);
		
	save ../dta_secure/job_order_uniq_month.dta, replace;
	
* collapse at the firm X month level, where month is the approval month;
	
		foreach x in ticket_return food_free accom_free medical_free {;
			gen num_vac_req_`x' = num_vac_req if apr_`x' == 1;
			gen num_vac_app_`x' = num_vac_app if apr_`x' == 1;
			gen num_vac_sent_`x' = num_vac_sent if apr_`x' == 1;
			rename apr_`x' num_jo_`x';
		};
	
		collapse (sum) aprvtag num_jo* num_vac_* 
			(mean) mean_vac_sent maxsalary usd_minsalary usd_maxsalary usd_minsalary_d usd_maxsalary_d mean_vac_req=num_vac_req, 
			by(license_no year month);
	
* clean;
		destring, replace; 
		rename aprvtag num_job_orders;
		
		gen ticket_share_vac = num_vac_req_ticket_return/num_vac_req; 
		gen ticket_share_jo = num_jo_ticket_return/num_job_orders;
		
		gen med_share_vac = num_vac_req_medical_free/num_vac_req; 
		gen med_share_jo = num_jo_medical_free/num_job_orders;
		
		gen accom_share_vac = num_vac_req_accom_free/num_vac_req; 
		gen accom_share_jo = num_jo_accom_free/num_job_orders;
		
		gen food_share_vac = num_vac_req_food_free/num_vac_req; 
		gen food_share_jo = num_jo_food_free/num_job_orders;

		gen domestic_share = num_vac_req_domestic/num_vac_req;
		gen fill_rate = num_vac_sent/num_vac_req; 
		
		
		la var ticket_share_vac "Share of vacancies with return ticket provided";
		la var ticket_share_jo "Share of job orders with return ticket provided";
		la var med_share_vac "Share of vacancies with health insurance provided";
		la var med_share_jo "Share of job orders with health insurance provided";
		la var accom_share_vac "Share of vacancies with accomodations provided";
		la var accom_share_jo "Share of job orders with accomodations provided";
		la var food_share_vac "Share of vacancies with food provided";
		la var food_share_jo "Share of job orders with food provided";
		
		la var fill_rate "fill rate using sums not means across job orders";
		la var num_job_orders "Number of job orders in month";
		la var mean_vac_req "Avg. number of vacancies requested in job order"; 
		la var usd_maxsalary "Avg. max salary of job order"; 
		la var usd_minsalary "Avg. min salary of job order"; 
		la var usd_maxsalary_d "Avg. max salary of job order - 2015 dollars"; 
		la var usd_minsalary_d "Avg. min salary of job order - 2015 dollars"; 
		la var mean_vac_sent "Avg. vacancy fill rate";
		la var domestic_share "Avg. share of vacancies that are domestic work";
			

	
		
	drop if month ==  . ; 
	
	
		save "../dta_secure/firm_joborder_month", replace;


